{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "ef344114-c02b-449b-8711-ab9be00c0080",
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "from getpass import getpass\n",
    "import pandas as pd\n",
    "from datetime import datetime, timedelta\n",
    "from elasticsearch import Elasticsearch, helpers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6baca258-b116-4bbc-9cba-2841cb754e46",
   "metadata": {},
   "outputs": [],
   "source": [
    "def connect_to_nasa():\n",
    "    url = \"https://api.nasa.gov/neo/rest/v1/feed\"\n",
    "    nasa_api_key = getpass(\"NASA API Key: \")\n",
    "    today = datetime.now()\n",
    "    params = {\n",
    "        \"api_key\": nasa_api_key,\n",
    "        \"start_date\": today - timedelta(days=7),\n",
    "        \"end_date\": datetime.now(),\n",
    "    }\n",
    "    return requests.get(url, params).json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "f6e2cfce-505b-4e58-aabc-555d80ccee9e",
   "metadata": {},
   "outputs": [],
   "source": [
    "response = connect_to_nasa()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "6fcb5d7b-f0c7-4973-a005-c0a724447068",
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_df(response):\n",
    "    all_objects = []\n",
    "    for date, objects in response[\"near_earth_objects\"].items():\n",
    "        for obj in objects:\n",
    "            obj[\"close_approach_date\"] = date\n",
    "            all_objects.append(obj)\n",
    "    df = pd.json_normalize(all_objects)\n",
    "    return df.drop(\"close_approach_data\", axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "ad60bb3a-46ad-48d6-849d-d33457a7e0ea",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>neo_reference_id</th>\n",
       "      <th>name</th>\n",
       "      <th>nasa_jpl_url</th>\n",
       "      <th>absolute_magnitude_h</th>\n",
       "      <th>is_potentially_hazardous_asteroid</th>\n",
       "      <th>is_sentry_object</th>\n",
       "      <th>close_approach_date</th>\n",
       "      <th>links.self</th>\n",
       "      <th>estimated_diameter.kilometers.estimated_diameter_min</th>\n",
       "      <th>estimated_diameter.kilometers.estimated_diameter_max</th>\n",
       "      <th>estimated_diameter.meters.estimated_diameter_min</th>\n",
       "      <th>estimated_diameter.meters.estimated_diameter_max</th>\n",
       "      <th>estimated_diameter.miles.estimated_diameter_min</th>\n",
       "      <th>estimated_diameter.miles.estimated_diameter_max</th>\n",
       "      <th>estimated_diameter.feet.estimated_diameter_min</th>\n",
       "      <th>estimated_diameter.feet.estimated_diameter_max</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2137924</td>\n",
       "      <td>2137924</td>\n",
       "      <td>137924 (2000 BD19)</td>\n",
       "      <td>https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...</td>\n",
       "      <td>17.51</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>2024-02-16</td>\n",
       "      <td>http://api.nasa.gov/neo/rest/v1/neo/2137924?ap...</td>\n",
       "      <td>0.836672</td>\n",
       "      <td>1.870854</td>\n",
       "      <td>836.671502</td>\n",
       "      <td>1870.854353</td>\n",
       "      <td>0.519883</td>\n",
       "      <td>1.162495</td>\n",
       "      <td>2744.985330</td>\n",
       "      <td>6137.973796</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2355046</td>\n",
       "      <td>2355046</td>\n",
       "      <td>355046 (2006 SO19)</td>\n",
       "      <td>https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...</td>\n",
       "      <td>19.66</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>2024-02-16</td>\n",
       "      <td>http://api.nasa.gov/neo/rest/v1/neo/2355046?ap...</td>\n",
       "      <td>0.310853</td>\n",
       "      <td>0.695088</td>\n",
       "      <td>310.852938</td>\n",
       "      <td>695.088301</td>\n",
       "      <td>0.193155</td>\n",
       "      <td>0.431908</td>\n",
       "      <td>1019.858754</td>\n",
       "      <td>2280.473500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3092138</td>\n",
       "      <td>3092138</td>\n",
       "      <td>(1995 FO)</td>\n",
       "      <td>https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...</td>\n",
       "      <td>20.80</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>2024-02-16</td>\n",
       "      <td>http://api.nasa.gov/neo/rest/v1/neo/3092138?ap...</td>\n",
       "      <td>0.183889</td>\n",
       "      <td>0.411188</td>\n",
       "      <td>183.888672</td>\n",
       "      <td>411.187571</td>\n",
       "      <td>0.114263</td>\n",
       "      <td>0.255500</td>\n",
       "      <td>603.309311</td>\n",
       "      <td>1349.040631</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3274166</td>\n",
       "      <td>3274166</td>\n",
       "      <td>(2005 EL169)</td>\n",
       "      <td>https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...</td>\n",
       "      <td>22.04</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>2024-02-16</td>\n",
       "      <td>http://api.nasa.gov/neo/rest/v1/neo/3274166?ap...</td>\n",
       "      <td>0.103886</td>\n",
       "      <td>0.232295</td>\n",
       "      <td>103.885510</td>\n",
       "      <td>232.295062</td>\n",
       "      <td>0.064551</td>\n",
       "      <td>0.144341</td>\n",
       "      <td>340.831737</td>\n",
       "      <td>762.122933</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3743895</td>\n",
       "      <td>3743895</td>\n",
       "      <td>(2016 CK246)</td>\n",
       "      <td>https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...</td>\n",
       "      <td>21.74</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>2024-02-16</td>\n",
       "      <td>http://api.nasa.gov/neo/rest/v1/neo/3743895?ap...</td>\n",
       "      <td>0.119277</td>\n",
       "      <td>0.266710</td>\n",
       "      <td>119.276525</td>\n",
       "      <td>266.710417</td>\n",
       "      <td>0.074115</td>\n",
       "      <td>0.165726</td>\n",
       "      <td>391.327193</td>\n",
       "      <td>875.034205</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        id neo_reference_id                name  \\\n",
       "0  2137924          2137924  137924 (2000 BD19)   \n",
       "1  2355046          2355046  355046 (2006 SO19)   \n",
       "2  3092138          3092138           (1995 FO)   \n",
       "3  3274166          3274166        (2005 EL169)   \n",
       "4  3743895          3743895        (2016 CK246)   \n",
       "\n",
       "                                        nasa_jpl_url  absolute_magnitude_h  \\\n",
       "0  https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...                 17.51   \n",
       "1  https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...                 19.66   \n",
       "2  https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...                 20.80   \n",
       "3  https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...                 22.04   \n",
       "4  https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...                 21.74   \n",
       "\n",
       "   is_potentially_hazardous_asteroid  is_sentry_object close_approach_date  \\\n",
       "0                              False             False          2024-02-16   \n",
       "1                              False             False          2024-02-16   \n",
       "2                              False             False          2024-02-16   \n",
       "3                              False             False          2024-02-16   \n",
       "4                              False             False          2024-02-16   \n",
       "\n",
       "                                          links.self  \\\n",
       "0  http://api.nasa.gov/neo/rest/v1/neo/2137924?ap...   \n",
       "1  http://api.nasa.gov/neo/rest/v1/neo/2355046?ap...   \n",
       "2  http://api.nasa.gov/neo/rest/v1/neo/3092138?ap...   \n",
       "3  http://api.nasa.gov/neo/rest/v1/neo/3274166?ap...   \n",
       "4  http://api.nasa.gov/neo/rest/v1/neo/3743895?ap...   \n",
       "\n",
       "   estimated_diameter.kilometers.estimated_diameter_min  \\\n",
       "0                                           0.836672      \n",
       "1                                           0.310853      \n",
       "2                                           0.183889      \n",
       "3                                           0.103886      \n",
       "4                                           0.119277      \n",
       "\n",
       "   estimated_diameter.kilometers.estimated_diameter_max  \\\n",
       "0                                           1.870854      \n",
       "1                                           0.695088      \n",
       "2                                           0.411188      \n",
       "3                                           0.232295      \n",
       "4                                           0.266710      \n",
       "\n",
       "   estimated_diameter.meters.estimated_diameter_min  \\\n",
       "0                                        836.671502   \n",
       "1                                        310.852938   \n",
       "2                                        183.888672   \n",
       "3                                        103.885510   \n",
       "4                                        119.276525   \n",
       "\n",
       "   estimated_diameter.meters.estimated_diameter_max  \\\n",
       "0                                       1870.854353   \n",
       "1                                        695.088301   \n",
       "2                                        411.187571   \n",
       "3                                        232.295062   \n",
       "4                                        266.710417   \n",
       "\n",
       "   estimated_diameter.miles.estimated_diameter_min  \\\n",
       "0                                         0.519883   \n",
       "1                                         0.193155   \n",
       "2                                         0.114263   \n",
       "3                                         0.064551   \n",
       "4                                         0.074115   \n",
       "\n",
       "   estimated_diameter.miles.estimated_diameter_max  \\\n",
       "0                                         1.162495   \n",
       "1                                         0.431908   \n",
       "2                                         0.255500   \n",
       "3                                         0.144341   \n",
       "4                                         0.165726   \n",
       "\n",
       "   estimated_diameter.feet.estimated_diameter_min  \\\n",
       "0                                     2744.985330   \n",
       "1                                     1019.858754   \n",
       "2                                      603.309311   \n",
       "3                                      340.831737   \n",
       "4                                      391.327193   \n",
       "\n",
       "   estimated_diameter.feet.estimated_diameter_max  \n",
       "0                                     6137.973796  \n",
       "1                                     2280.473500  \n",
       "2                                     1349.040631  \n",
       "3                                      762.122933  \n",
       "4                                      875.034205  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = create_df(response)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "d2bbb54b-0fbe-4c47-bb19-5286f8410779",
   "metadata": {},
   "outputs": [],
   "source": [
    "def connect_to_elastic():\n",
    "    elastic_cloud_id = getpass(\"Elastic Cloud ID: \")\n",
    "    elastic_api_key = getpass(\"Elastic API Key: \")\n",
    "    return Elasticsearch(cloud_id=elastic_cloud_id, api_key=elastic_api_key)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "8e04aa93-89c1-4719-9912-e09fa401dc0c",
   "metadata": {},
   "outputs": [],
   "source": [
    "es = connect_to_elastic()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "a675884d-ef9f-4ddd-9c2f-72f577cd6785",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'asteroid_data_set'})"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "index_name = \"asteroid_data_set\"\n",
    "es.indices.create(index=index_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "91d13bd6-fef5-458d-a533-4508a2096186",
   "metadata": {},
   "outputs": [],
   "source": [
    "def doc_generator(df, index_name):\n",
    "    for index, document in df.iterrows():\n",
    "        yield {\n",
    "            \"_index\": index_name,\n",
    "            \"_id\": f\"{document['id']}\",\n",
    "            \"_source\": document.to_dict(),\n",
    "        }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "aa23b918-b7b7-4d61-975b-1d6d7e6ca2d4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(146, [])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "helpers.bulk(es, doc_generator(df, index_name))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "62be67d5-d880-4d97-882c-b4403dcde4e8",
   "metadata": {},
   "outputs": [],
   "source": [
    "def updated_last(es, index_name):\n",
    "    query = {\n",
    "        \"size\": 0,\n",
    "        \"aggs\": {\"last_date\": {\"max\": {\"field\": \"close_approach_date\"}}},\n",
    "    }\n",
    "    response = es.search(index=index_name, body=query)\n",
    "    last_updated_date_string = response[\"aggregations\"][\"last_date\"][\"value_as_string\"]\n",
    "    datetime_obj = datetime.strptime(last_updated_date_string, \"%Y-%m-%dT%H:%M:%S.%fZ\")\n",
    "    return datetime_obj.strftime(\"%Y-%m-%d\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "7d3e9c0a-af38-4e29-92d6-afe6a1aa489b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-23\n"
     ]
    }
   ],
   "source": [
    "last_update_date = updated_last(es, index_name)\n",
    "print(last_update_date)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "c547f76a-8ecf-407a-abce-6295ce6f8445",
   "metadata": {},
   "outputs": [],
   "source": [
    "def update_new_data(df, es, last_update_date, index_name):\n",
    "    if isinstance(last_update_date, str):\n",
    "        last_update_date = datetime.strptime(last_update_date, \"%Y-%m-%d\")\n",
    "\n",
    "    last_update_date = pd.Timestamp(last_update_date).normalize()\n",
    "\n",
    "    if not df.empty and \"close_approach_date\" in df.columns:\n",
    "        df[\"close_approach_date\"] = pd.to_datetime(df[\"close_approach_date\"])\n",
    "\n",
    "    today = pd.Timestamp(datetime.now().date()).normalize()\n",
    "\n",
    "    if df is not None and not df.empty:\n",
    "        update_range = df.loc[\n",
    "            (df[\"close_approach_date\"] > last_update_date)\n",
    "            & (df[\"close_approach_date\"] < today)\n",
    "        ]\n",
    "        if not update_range.empty:\n",
    "            helpers.bulk(es, doc_generator(update_range, index_name))\n",
    "        else:\n",
    "            print(\"No new data to update.\")\n",
    "    else:\n",
    "        print(\"The DataFrame is None.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "c5f94741-47a7-4546-bc3d-5962d293f182",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "No new data to update.\n",
      "2024-02-23\n"
     ]
    }
   ],
   "source": [
    "try:\n",
    "    if df is None:\n",
    "        raise ValueError(\"DataFrame is None. There may be a problem.\")\n",
    "    update_new_data(df, es, last_update_date, index_name)\n",
    "    print(updated_last(es, index_name))\n",
    "except Exception as e:\n",
    "    print(f\"An error occurred: {e}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
